## install packages
library(tm.plugin.factiva); library(quanteda); library(tm); library(readtext); library(tidytext); library(stringr); library(wordcloud); library(ggplot2); library(lubridate); library(tidyr)

## read in Factiva data
source1 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_1.html")
corpus1 <- Corpus(source1)
source2 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_2.html")
corpus2 <- Corpus(source2)
source3 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_3.html")
corpus3 <- Corpus(source3)
source4 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_4.html")
corpus4 <- Corpus(source4)
source5 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_5.html")
corpus5 <- Corpus(source5)
source6 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_6.html")
corpus6 <- Corpus(source6)
source7 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_7.html")
corpus7 <- Corpus(source7)
source8 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_8.html")
corpus8 <- Corpus(source8)
source9 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_9.html")
corpus9 <- Corpus(source9)
source10 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_10.html")
corpus10 <- Corpus(source10)
source11 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_11.html")
corpus11 <- Corpus(source11)
source12 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_12.html")
corpus12 <- Corpus(source12)
source13 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_13.html")
corpus13 <- Corpus(source13)
source14 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_14.html")
corpus14 <- Corpus(source14)
source15 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_15.html")
corpus15 <- Corpus(source15)
source16 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_16.html")
corpus16 <- Corpus(source11)
source17 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_17.html")
corpus17 <- Corpus(source17)
source18 <- FactivaSource("~/Dropbox/australia-frames/newspaper-articles/articles/articles_18.html")
corpus18 <- Corpus(source18)

full_corpus <- c(corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7, corpus8, corpus9, corpus10, corpus11, corpus12, corpus13, corpus14, corpus15, corpus16, corpus17, corpus18)

## sub unigrams for bigrams
for(i in 1:1784){
  full_corpus[[i]]$content<-gsub("international law professor", "", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("professor of international law", "", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("lecturer in international law", "", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("lecturer of international law", "", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("international law lecturer", "", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("international law", "international_law", full_corpus[[i]]$content, ignore.case=T) 
  full_corpus[[i]]$content<-gsub("refugees convention", "refugee_convention", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("convention on refugees", "convention_on_refugees", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("refugee convention", "refugee_convention", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("legal obligation", "legal_obligation", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("legal duty", "legal_duty", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("international legal", "international_legal", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("duty of care", "duty_of_care", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("moral obligation", "moral_obligation", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("moral duty", "moral_duty", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("global citizen", "global_citizen", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("international pariah", "international_pariah", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("international standing", "international_standing", full_corpus[[i]]$content, ignore.case=T)
  full_corpus[[i]]$content<-gsub("national character", "national_character", full_corpus[[i]]$content, ignore.case=T)
}

## tidy (convert to rectangular data frame)
full_corpus_td<- tidy(full_corpus)

## creating month of publication docvar
full_corpus_td$month <- month(full_corpus_td$datetimestamp)
full_corpus_td$month_year <- format(as.Date(full_corpus_td$datetimestamp), "%Y-%m")

## convert to corpus
full_corpus_td <- corpus(full_corpus_td)

## tokenise corpus
full_corpus_tokens <- tokens(full_corpus_td, remove_punct = TRUE) 

## create dfm
full_corpus_tokens_dfm <- dfm(full_corpus_tokens)

## remove words
removewords <- (c('convention centre*', 'national convention*', 'annual convention*', 'the convention whereby', 'convention by refusing', 'party convention', 'climate change convention',
                'ALP Convention', 'National Convention', 'convention was held'))
full_corpus_tokens_dfm<- dfm_remove(full_corpus_tokens_dfm, stopwords("en"))
full_corpus_tokens_dfm<- dfm_remove(full_corpus_tokens_dfm, removewords)

## creating dictionary
dict <- dictionary(list(international_law = c("ratify", "ratified", "ratification", "international_law", "convention","refugee_convention", "legal_obligation", "treaty", "legal_duty", "legal_responsibility", "legal_obligation", "international_legal","convention_on_refugees"),       
                        moral = c("moral", "compassion", "decent", "repugnant", "decency", "unethical", "ethical", "duty_of_care", "moral_duty", "moral obligation", "morally"),
                        reputation = c("embarrass", "global_citizen", "reputation", "embarrassing", "international_pariah", "national_character")))

## dictionary lookup (by month)
full_corpus_dict_lookup <- tokens_lookup(full_corpus_tokens, dictionary = dict)
frames_by_date <- dfm(full_corpus_dict_lookup) %>% 
  dfm_group(group = month_year, fill = TRUE) 

## plot dictionary lookup by month
frames_by_date <- tidy(frames_by_date)
frames_by_date$date <- as.Date(paste0(frames_by_date$document,'-01'), format = "%Y-%m-%d")

plot1 <- ggplot(data=frames_by_date, aes(date, count))+geom_line(aes(color=term))+
  theme_classic()+theme(legend.title = element_blank())+theme(legend.position="bottom")+ylab("Number of Articles by Month")+xlab("")+
  scale_color_manual(labels = c('International Law', 'Moral', 'Reputational'), values = c("blue", "grey30", "red"))
plot1
ggsave("~/Dropbox/australia-frames/newspaper-articles/plot_by_month.png", plot1,  dpi = 300)
